# !/usr/bin/python
# -*- coding: utf-8 -*-
import datetime
import os
import time
import bs4 as bs4
from pip._vendor import requests

from 神方.util.write_excel import write_file

data = {
        "序号": ["省份", "城市", "日期", "行业", "内容"]
    }

url = 'http://www.zgazxxw.com/hn-001011l772-0.html'

# 获取前1天或N天的日期，beforeOfDay=1：前1天；beforeOfDay=N：前N天
def getdate(beforeOfDay):
    today = datetime.datetime.now()
    # 计算偏移量
    offset = datetime.timedelta(days=-beforeOfDay)
    # 获取想要的日期的时间
    re_date = (today + offset).strftime('%Y-%m-%d')
    return re_date

# 获得当前年份
nowYear = datetime.datetime.now().strftime('%Y')
# 获得当前日期
nowTime_str = getdate(10)
# mktime参数为struc_time,将日期转化为秒，
e_time = time.mktime(time.strptime(nowTime_str, "%Y-%m-%d"))


# 获取下一页的url
def getLastDate(trs):
    # 当前总条数
    num = len(trs)
    # 获取到最后一条的日期
    lastDate = nowYear + '-' + trs[num - 1]('td')[4].string
    print('最后一条的日期是：%s'% lastDate)
    # 日期转化为int比较
    lastDate = time.mktime(time.strptime(lastDate, "%Y-%m-%d"))
    lastDiff = int(lastDate) - int(e_time)
    if lastDiff >= 0:
        return 1


def writeValue(trs, pageNum, index):
    # 写入文本
    with open('a.txt', 'a') as f:
        # f.write('河南省招投标，地址：%s'% url)
        f.write('\n')  # 换行
    for i in trs:
        tds = i('td')
        if len(tds) == 5:
            # 内容
            content = tds[0].a['title']
            # 行业
            industry = tds[1].string
            # 日期
            date = nowYear + '-' + tds[4].string
            s_time = time.mktime(time.strptime(date, "%Y-%m-%d"))
            # 日期转化为int比较
            diff = int(s_time) - int(e_time)
            if diff >= 0:
                # print('日期：%s,行业：%s,内容：%s' % (date, industry, content))
                # print('获取到的序号是%s' % index)
                list = ['河南省', '', date, industry, content]
                data[str(index)] = list
                index = index + 1

                # 写入文本
                # with open('a.txt', 'a') as f:
                #     f.write('日期：%s,行业：%s,内容：%s' % (date, industry, content))
                #     f.write('\n')  # 换行

    flag = getLastDate(trs)
    if flag == 1:
        pageNum = pageNum + 1
        urlNext = 'http://www.zgazxxw.com/hn-001011l772-' + str(pageNum) + '.html'
        getValue(urlNext, pageNum, index)
    # else:
    #     with open('a.txt', 'a') as f:
    #         # f.write('=====================================================================')
    #         f.write('\n')  # 换行

def getValue(url, pageNum, index):
    # 模拟浏览器发送http请求
    response = requests.get(url)

    # 设置编码为网站的编码
    response.encoding = response.apparent_encoding

    # 获取到该网站的所有内容
    html = response.text

    soup = bs4.BeautifulSoup(html, 'html.parser')
    trs = soup.find('tbody').findAll('tr')

    writeValue(trs, pageNum, index)


if __name__ == '__main__':
    now_time = datetime.datetime.now().strftime('%Y%m%d')
    pageNum = 0
    index = 1
    getValue(url, pageNum, index)

    # 判断excel文件是否存在
    file_path = 'D:\\招投标信息' + now_time + '-' + nowTime_str.replace('-', '') + '.xls'
    if os.path.exists(file_path):
        print('删除文件')
        os.remove(file_path)
    # 写入excel文件
    write_file(file_path, data, '河南省')

